{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 特征工程\n",
    "### 1. 对原数据中0值填充均值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入工具包\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0          6                           148              72   \n",
       "1          1                            85              66   \n",
       "2          8                           183              64   \n",
       "3          1                            89              66   \n",
       "4          0                           137              40   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin   BMI  \\\n",
       "0                           35              0  33.6   \n",
       "1                           29              0  26.6   \n",
       "2                            0              0  23.3   \n",
       "3                           23             94  28.1   \n",
       "4                           35            168  43.1   \n",
       "\n",
       "   Diabetes_pedigree_function  Age  Target  \n",
       "0                       0.627   50       1  \n",
       "1                       0.351   31       0  \n",
       "2                       0.672   32       1  \n",
       "3                       0.167   21       0  \n",
       "4                       2.288   33       1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 显示数据情况\n",
    "data = pd.read_csv('pima-indians-diabetes.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 768 entries, 0 to 767\n",
      "Data columns (total 9 columns):\n",
      "pregnants                       768 non-null int64\n",
      "Plasma_glucose_concentration    768 non-null int64\n",
      "blood_pressure                  768 non-null int64\n",
      "Triceps_skin_fold_thickness     768 non-null int64\n",
      "serum_insulin                   768 non-null int64\n",
      "BMI                             768 non-null float64\n",
      "Diabetes_pedigree_function      768 non-null float64\n",
      "Age                             768 non-null int64\n",
      "Target                          768 non-null int64\n",
      "dtypes: float64(2), int64(7)\n",
      "memory usage: 54.1 KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>count</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>mean</td>\n",
       "      <td>3.845052</td>\n",
       "      <td>120.894531</td>\n",
       "      <td>69.105469</td>\n",
       "      <td>20.536458</td>\n",
       "      <td>79.799479</td>\n",
       "      <td>31.992578</td>\n",
       "      <td>0.471876</td>\n",
       "      <td>33.240885</td>\n",
       "      <td>0.348958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>std</td>\n",
       "      <td>3.369578</td>\n",
       "      <td>31.972618</td>\n",
       "      <td>19.355807</td>\n",
       "      <td>15.952218</td>\n",
       "      <td>115.244002</td>\n",
       "      <td>7.884160</td>\n",
       "      <td>0.331329</td>\n",
       "      <td>11.760232</td>\n",
       "      <td>0.476951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>min</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.078000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>25%</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>62.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>27.300000</td>\n",
       "      <td>0.243750</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>50%</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>30.500000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>0.372500</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>75%</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>140.250000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>127.250000</td>\n",
       "      <td>36.600000</td>\n",
       "      <td>0.626250</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>max</td>\n",
       "      <td>17.000000</td>\n",
       "      <td>199.000000</td>\n",
       "      <td>122.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>846.000000</td>\n",
       "      <td>67.100000</td>\n",
       "      <td>2.420000</td>\n",
       "      <td>81.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "count  768.000000                    768.000000      768.000000   \n",
       "mean     3.845052                    120.894531       69.105469   \n",
       "std      3.369578                     31.972618       19.355807   \n",
       "min      0.000000                      0.000000        0.000000   \n",
       "25%      1.000000                     99.000000       62.000000   \n",
       "50%      3.000000                    117.000000       72.000000   \n",
       "75%      6.000000                    140.250000       80.000000   \n",
       "max     17.000000                    199.000000      122.000000   \n",
       "\n",
       "       Triceps_skin_fold_thickness  serum_insulin         BMI  \\\n",
       "count                   768.000000     768.000000  768.000000   \n",
       "mean                     20.536458      79.799479   31.992578   \n",
       "std                      15.952218     115.244002    7.884160   \n",
       "min                       0.000000       0.000000    0.000000   \n",
       "25%                       0.000000       0.000000   27.300000   \n",
       "50%                      23.000000      30.500000   32.000000   \n",
       "75%                      32.000000     127.250000   36.600000   \n",
       "max                      99.000000     846.000000   67.100000   \n",
       "\n",
       "       Diabetes_pedigree_function         Age      Target  \n",
       "count                  768.000000  768.000000  768.000000  \n",
       "mean                     0.471876   33.240885    0.348958  \n",
       "std                      0.331329   11.760232    0.476951  \n",
       "min                      0.078000   21.000000    0.000000  \n",
       "25%                      0.243750   24.000000    0.000000  \n",
       "50%                      0.372500   29.000000    0.000000  \n",
       "75%                      0.626250   41.000000    1.000000  \n",
       "max                      2.420000   81.000000    1.000000  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pregnants                         0\n",
      "Plasma_glucose_concentration      5\n",
      "blood_pressure                   35\n",
      "Triceps_skin_fold_thickness     227\n",
      "serum_insulin                   374\n",
      "BMI                              11\n",
      "Diabetes_pedigree_function        0\n",
      "Age                               0\n",
      "Target                            0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 选取“ ['血浆葡萄糖浓度','舒张压','肱三头肌皮褶厚度','餐后血清胰岛素','BMI']”特征添加缺值\n",
    "nan_number = ['Plasma_glucose_concentration','blood_pressure','Triceps_skin_fold_thickness','serum_insulin','BMI']\n",
    "data[nan_number] = data[nan_number].replace(0,np.NaN)\n",
    "print(data.isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>Triceps_skin_fold_thickness_miss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>29.0</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>29.15342</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>23.0</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Triceps_skin_fold_thickness  Triceps_skin_fold_thickness_miss\n",
       "0                         35.0                           0.00000\n",
       "1                         29.0                           0.00000\n",
       "2                          NaN                          29.15342\n",
       "3                         23.0                           0.00000\n",
       "4                         35.0                           0.00000"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 把特征中NAN值较多的特征值改为中位数\n",
    "#data['Triceps_skin_fold_thickness_miss'] = data['Triceps_skin_fold_thickness'].apply(lambda x:np.mean(data['Triceps_skin_fold_thickness']) if pd.isnull(x) else 0)\n",
    "#data[['Triceps_skin_fold_thickness','Triceps_skin_fold_thickness_miss']].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(<matplotlib.axes._subplots.AxesSubplot at 0x6fe131fc08>,\n",
       " <matplotlib.axes._subplots.AxesSubplot at 0x6fe131fc08>)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAEHCAYAAACX/oD+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de7xVdZ3/8ddbDogXJuWioWBYmhNoohBaqXkrkclbmT+YyXupv0GzmeRXTTNqpL9fjZVdnLEoUUxDHfOCjZq3HLJGERQQUBMvo0dQbioYInL4/P74fjdne9znsIGz9j6X9/Px2I+99nd911rftfb3uz5rrb32dykiMDMzK8pW9S6AmZl1bQ40ZmZWKAcaMzMrlAONmZkVyoHGzMwK1VDvArSn/v37x5AhQ+pdDOuiZs2atSwiBtR6ua7XVqRa1OsuFWiGDBnCzJkz610M66Ik/U89lut6bUWqRb32pTMzMyuUA42ZmRXKgcbMzArVpX6j6areeecdGhsbWbNmTb2L0qbevXszaNAgevbsWe+imNVUZ2ij9WyfDjSdQGNjI3369GHIkCFIqndxKooIli9fTmNjI7vvvnu9i2NWUx29jda7ffrSWSewZs0a+vXr1yErcIkk+vXr16GP6MyK0tHbaL3bpwNNJ9FRK3C5zlBGs6J09Ppfz/I50JiZWaH8G00ntXz5co444ggAXnnlFXr06MGAAenPvTNmzKBXr17tvszHHnuMJUuWMHr06Haft1lX4zbazIGmAxgx4VpmXXbKJk3Tr18/Zs+eDcDFF1/M9ttvzwUXXLBh/IKXljF0cP9Wp29qaqJHjx6btMzHHnuMefPmdbhK3N2NmHBtXZa7qXW2u9lYG92YrtRGfemsCzrmmGP4wpgjGDZsGL/85S8BWLduHTvssAP//M//zKhRo5gxYwbTpk1jr7324uCDD+a8887j+OOPB+DNN9/ktNNOY9SoUey3337ccccdvPXWW0ycOJHrr7+e4cOHc/PNN9dzFc06tWOOOYYRI0Z0mzZa2BmNpMnAZ4ElEbF3TrsR2Ctn2QF4PSKGV5j2BWAV0ASsi4iRRZWzK5oyZQqv/GU9Q/pty8iRI/n85z9Pnz59eOONN9h///255JJLWL16NR/+8If54x//yG677cZJJ520YfqJEycyevRorrnmGl577TUOOOAA5s6dy4UXXsi8efP40Y9+VMe1M+v8pkyZQt++fVm9enW3aKNFntFcA7zr/C0i/ldEDM/B5TfALW1Mf1jO6yCziS6//HJOOOpQPv7xj9PY2Mizzz4LQK9evTjhhBMAWLBgAXvttRcf+MAHkMS4ceM2TH/PPfdw6aWXMnz4cA477DDWrFnDiy++WJd16egk9ZY0Q9IcSfMlfTunXyPpeUmz82t4Tpekn0haKGmupP3ruwZWD5dffjn77rtvt2mjhZ3RRMR0SUMqjVO6z+4k4PCilt9d3XfffUyfPp2pt9/F/nsO5qCDDtpw7/w222yz4RbHiGh1HhHBbbfdxoc+9KF3pU+fPr24gndebwOHR8SbknoCD0m6K4+bEBEtr18cDeyZXwcAV+Z36yZKbfThhx9mm2226RZttF6/0RwMvBoRz7QyPoB7JM2SdFYNy9XpvfHGG/Tt25fevbdh/vz5PProoxXzDRs2jKeffpqXXnqJiODGG2/cMO6oo47iJz/5yYbPjz/+OAB9+vRh1apVxa5AJxPJm/ljz/xqfQ8BxwHX5ukeBnaQNLDoclrHUWqj22zTfdpovQLNOGBqG+M/GRH7k47+xks6pLWMks6SNFPSzKVLl7Z3OTudv/mbv2H16tWccNShTJw4kQMOqHywvO2223LFFVdw5JFHcvDBB7PLLrvwvve9D4CLLrqI1atXs88++zBs2DAuvvhiAA4//HDmzJnDfvvt16F+aKw3ST0kzQaWAPdGxCN51KX58tjlkrbOabsCL5VN3pjTWs7T9bqLKrXRfffdt9u00Zrf3iypAfgcMKK1PBGxKL8vkXQrMAqoeE4YEZOASQAjR45s60iyyypVMkgd5/3ud7+reHvz66+//q7PRx55JE8//TQRwdlnn83IkennsO22245f/OIX71nOgAED/ACuCiKiCRguaQfgVkl7A98EXgF6kern14GJQKW/Z7+n3rpedy2V2mglXbWN1uOM5kjgqYhorDRS0naS+pSGgc8A82pYvm7jyiuvZPjw4QwdOpS33nqLL3/5y/UuUqcWEa8DDwKjI2Jxvjz2NnA16WAJ0hnM4LLJBgGLalpQ6zS6Shst8vbmqcChQH9JjcBFEXEVMJYWl80k7QL8MiLGADuTjgpL5ft1RNxdVDm7swkTJjBhwoR6F6NTkzQAeCciXpe0DelA6nuSBkbE4nzjy/E0HyxNA86VdAPpJoA3ImJxXQpvHV5XaaNF3nU2rpX00yqkLQLG5OHngH2LKpdZOxsITJHUg3SF4KaI+K2kB3IQEjAbOCfnv5NU1xcCq4HT61Bms5pyFzTdzMa6prFNExFzgf0qpFe8dT/SPavjiy6XWUfiLmjMzKxQDjRmZlYoXzrrhNq7t95qe+G9++67Of/882lqauJLX/oS3/jGN9q1HGZdhdvou/mMxqrS1NTE+PHjueuuu1iwYAFTp05lwYIF9S6WmWUduY060FhVZsyYwR577MEHP/hBevXqxdixY7n99tvrXSwzyzpyG3Wgsaq8/PLLDB7c/D/DQYMG8fLLL9exRGZWriO3UQcaq0qlnmRLvcyaWf115DbqQGNVGTRoEC+91NwXZGNjI7vssksdS2Rm5TpyG3Wgsap87GMf45lnnuH5559n7dq13HDDDRx77LH1LpaZZR25jfr25k6omlsdW+sBYHN7BmhoaOCKK67gqKOOoqmpiTPOOINhw4Zt8nzMuoNqb0duTx25jTrQWNXGjBnDmDFj6l0MM2tFR22jvnTWDkZMuLbd/6DVlgUvLavZsszMtpQDjZmZFcqBxszMCuVAY2ZmhXKgMTOzQjnQmJlZoXx7cye0/dWH8eLG8kDFPJXSd7vwiaqWe8YZZ/Db3/6WnXbaiXnz5lU1jVl39OLEfdp1ftW00Y7cPn1GY1U77bTTuPvuu+tdDDOroCO3z8ICjaTJkpZImleWdrGklyXNzq+K/yySNFrS05IWSuoYT+4xDjnkEPr27VvvYphZBR25fRZ5RnMNMLpC+uURMTy/7mw5UlIP4N+Ao4GhwDhJQwssp9lmk9Rb0gxJcyTNl/TtnL67pEckPSPpRkm9cvrW+fPCPH5IPctvVguFBZqImA6s2IxJRwELI+K5iFgL3AAc166FM2s/bwOHR8S+wHBgtKQDge+RDqr2BF4Dzsz5zwRei4g9gMtzPrMurR6/0ZwraW6+tLZjhfG7Ai+VfW7MaRVJOkvSTEkzly5d2t5l3WS17o7G6iuSN/PHnvkVwOHAzTl9CnB8Hj4ufyaPP0Id5aEhZgWpdaC5EvgQ6chvMfCDCnkqNbr3PtGnNCJiUkSMjIiRAwYMaJ9Smm0CST0kzQaWAPcCzwKvR8S6nKX8YGnDgVQe/wbQr8I8O9QBlNmWqOntzRHxamlY0i+A31bI1ggMLvs8CFhUcNE6lTdP//2Grv439XEAm/uYAIBx48bx4IMPsmzZMgYNGsS3v/1tzjzzzI1P2MVFRBMwXNIOwK3ARyply+9VHUhFxCRgEsDIkSNbPdCyjqnavwy0p47cPmsaaCQNjIjF+eMJQKWbvR8F9pS0O/AyMBb42xoV0dowderUehehQ4uI1yU9CBwI7CCpIZ+1lB8slQ6kGiU1AO9j837LNHuXjtw+i7y9eSrw38BekholnQn8q6QnJM0FDgP+IefdRdKdsOFywrnA74AngZsiYn5R5TTbEpIG5DMZJG0DHEmqt78HTszZTgVuz8PT8mfy+Aei0sPezbqQws5oImJcheSrWsm7CBhT9vlO4D23Ppt1QAOBKfm2/K1IB0a/lbQAuEHSJcDjNNf9q4BfSVpIOpMZW49Cm9WSu6DpJCKCjn5zUnc8MI+IucB+FdKfI92q3zJ9DfCFGhTNaqyjt9F6tk93QdMJ9O7dm+XLl3foHXlEsHz5cnr37l3vopjVXEdvo/Vunz6j6QQGDRpEY2MjpdtcX3ntTfTme4fLbWp6e+jduzeDBg0qZN5mHVnLNtoR1bN9OtB0Aj179mT33Xff8PmLE65l1mWnvGe43Kamm9nma9lG7d186czMzArlQGNmZoVyoOmG2vuhTGZmbXGgMTOzQjnQmJlZoRxozMysUA40ZmZWKAcaMzMrlAONmZkVyoHGzMwK5UBjZmaFcqAxM7NCOdCYmVmhHGg2YsSEa+tdhC3WFdbBzDovBxozMyuUA42ZmRWqsEAjabKkJZLmlaVdJukpSXMl3Spph1amfUHSE5JmS5pZVBnNtpSkwZJ+L+lJSfMlnZ/TL5b0cq7DsyWNKZvmm5IWSnpa0lH1K71ZbRR5RnMNMLpF2r3A3hHxUeDPwDfbmP6wiBgeESMLKp9Ze1gHfC0iPgIcCIyXNDSPuzzX4eERcSdAHjcWGEZqH/8uqUc9Cm5WK4UFmoiYDqxokXZPRKzLHx8G/IB569QiYnFEPJaHVwFPAru2MclxwA0R8XZEPA8sBEYVX1Kz+qnnbzRnAHe1Mi6AeyTNknRWWzORdJakmZJmLl26tN0LaVYtSUOA/YBHctK5+TLxZEk75rRdgZfKJmukQmByvbaupC6BRtK3SJccrm8lyycjYn/gaNKliENam1dETIqIkRExcsCAAQWU1mzjJG0P/Ab4akSsBK4EPgQMBxYDPyhlrTB5vCfB9dq6kJoHGkmnAp8F/i4i3tPAACJiUX5fAtyKLy1YByapJynIXB8RtwBExKsR0RQR64Ff0FyHG4HBZZMPAhbVsrxmtVbTQCNpNPB14NiIWN1Knu0k9SkNA58B5lXKa1ZvkgRcBTwZET8sSx9Ylu0EmuvwNGCspK0l7Q7sCcyoVXnN6qGhqBlLmgocCvSX1AhcRLrLbGvg3tQ+eTgizpG0C/DLiBgD7Azcmsc3AL+OiLuLKqfZFvokcDLwhKTZOe2fgHGShpMui70AnA0QEfMl3QQsIF0+Hh8RTTUvtVkNFRZoImJcheSrWsm7CBiTh58D9i2qXGbtKSIeovLvLne2Mc2lwKWFFcqsg3HPAJtpS/oPGzHhWvc/ZmbdhgONmZkVyoHGzMwK5UBjZmaFcqAxM7NCOdCYmVmhHGjMzKxQDjRmZlYoBxozMyuUA42ZmRXKgcbMzArlQFOgLe1qpuhuaoqYv7vWMbOWHGjMzKxQDjRmZlYoBxozMyuUA42ZmRXKgcbMzArlQGNmZoWqKtBIur+aNLPO7Igjjqgqzcw2TUNbIyX1BrYF+kvakeZno/8VsEvBZTOriTVr1rB69WqWLVvGa6+9RkQAsHLlShYtWlTn0pl1fhs7ozkbmAX8dX4vvW4H/m1jM5c0WdISSfPK0vpKulfSM/l9x1amPTXneUbSqdWukNmm+vnPf86IESN46qmnGDFixIbXcccdx/jx49ucVtJgSb+X9KSk+ZLOz+kV67mSn0haKGmupP1rsIpmddVmoImIH0fE7sAFEfHBiNg9v/aNiCuqmP81wOgWad8A7o+IPYH78+d3kdQXuAg4ABgFXNRaQDLbUueffz7PP/883//+93nuued4/vnnef7555kzZw7nnnvuxiZfB3wtIj4CHAiMlzSU1uv50cCe+XUWcGUR62TWkbR56awkIn4q6RPAkPJpIqLN/kYiYrqkIS2SjwMOzcNTgAeBr7fIcxRwb0SsAJB0LylgTa2mvGab47zzzuNPf/oTL7zwAuvWrduQfsopp7Q6TUQsBhbn4VWSngR2pfV6fhxwbaTrcw9L2kHSwDwfsy6p2psBfgV8HzgI+Fh+jdzMZe5calT5facKeXYFXir73JjTKpXtLEkzJc1cunTpZhap4/fRVU356rUOHX3bVevkk0/mggsu4KGHHuLRRx/l0UcfZebMmVVPnw+q9gMeofV6XlXdbq96bdYRVHVGQwoqQ6P0K2nxVCGt4rIjYhIwCWDkyJG1Kp91QTNnzmTBggVIlapf2yRtD/wG+GpErGxjHlXVbddr60qq/R/NPOD97bTMVyUNBMjvSyrkaQQGl30eBPj2HyvU3nvvzSuvvLLJ00nqSQoy10fELTm5tXruum3dTrVnNP2BBZJmAG+XEiPi2M1Y5jTgVOC7+f32Cnl+B/zfshsAPgN8czOWZVa1ZcuWMXToUEaNGsXWW2+9IX3atGmtTqN06nIV8GRE/LBsVGv1fBpwrqQbSDe7vOHfZ6yrqzbQXLw5M5c0lfSDaH9JjaQ7yb4L3CTpTOBF4As570jgnIj4UkSskPQd4NE8q4mlGwPMinLxxRdvzmSfBE4GnpA0O6f9E63Uc+BOYAywEFgNnL4FRTbrFKq96+y/NmfmETGulVHv+bt1RMwEvlT2eTIweXOWa7Y5PvWpT23yNBHxEJV/d4HK9TyAtv+cY9bFVBVoJK2i+QfLXkBP4C8R8VdFFcys1vr06bPhRoC1a9fyzjvvsN1227Fy5co6l8ysc6v2jKZP+WdJx5P+SGnWZaxatepdn2+77TZmzJhRp9KYdR2b1XtzRNwGHN7OZTHrUI4//ngeeOCBehfDrNOr9tLZ58o+bkX6X43v7bcu5ZZbbtkwvH79embOnLlZ/6kxs3er9q6zY8qG1wEvkLrSMOsy7rjjjg3DDQ0NDBkyhNtvr3T3vZltimp/o/EtmJ3QixP3YbcLn9jiPN3F1VdfXe8imHVJ1V46GwT8lPSfgQAeAs6PiMYCy2ZWU42NjZx33nn88Y9/RBIHHXQQP/7xjxk0aFC9i9YhvThxn7ot2wdHnUu1NwNcTfpH8y6kDgDvyGlmXcbpp5/Osccey6JFi3j55Zc55phjOP10n8ybbalqA82AiLg6Itbl1zXAgALLZVZzS5cu5fTTT6ehoYGGhgZOO+003HOy2ZarNtAsk/RFST3y64vA8iILZlZr/fv357rrrqOpqYmmpiauu+46+vXrV+9imXV61QaaM4CTgFdID3k6EffRZF3M5MmTuemmm3j/+9/PwIEDufnmm32DgFk7qPb25u8Ap0bEa7DhUcvfJwUgsy7hX/7lX5gyZQo77pg6DV+xYgUXXHABkye7yz2zLVHtGc1HS0EGIPekvF8xRTKrj7lz524IMgB9+/bl8ccfr2OJzLqGagPNVmXPhimd0VR7NmTWKaxfv57XXttwPMWKFStYt25dHUtk1jVUGyx+APxJ0s2k/9GcBFxaWKnM6uBrX/san/jEJzjxxBORxE033cS3vvWtehfLrNOrtmeAayXNJHWkKeBzEbGg0JKZ1dgpp5zCyJEjeeCBB4gIbrnlFoYOHVrvYpl1elVf/sqBxcHFurShQ4c6uJi1M//OUiMjJlwLwKzLTqlzSapT3gdaqauRE1ZN4NY+lwHv7QKkuTuSCRvSOts6m1kxNut5NGZmZtWqeaCRtJek2WWvlZK+2iLPoZLeKMtzYa3LaWZm7aPmgSYino6I4RExHBgBrAZurZD1D6V8ETGxtqU0q46kyZKWSJpXlnaxpJfLDpTGlI37pqSFkp6WdFR9Sm1WW/W+dHYE8GxE/E+dy2G2ua4BRldIv7zsQOlOAElDgbHAsDzNv0vqUbOSmtVJvQPNWGBqK+M+LmmOpLskDatlocyqFRHTgRVVZj8OuCEi3o6I54GFwKjCCmfWQdQt0EjqBRwL/EeF0Y8BH4iIfUkPXLutjfmcJWmmpJnu0t06kHMlzc2X1kq9auwKvFSWpzGnvYfrtXUl9TyjORp4LCJebTkiIlZGxJt5+E6gp6T+lWYSEZMiYmREjBwwwI/IsQ7hSuBDwHBSb+c/yOmqkDcqzcD12rqSegaacbRy2UzS+yUpD48ildPPv7FOISJejYimiFgP/ILmy2ONwOCyrIOARbUun1mt1SXQSNoW+DRwS1naOZLOyR9PBOZJmgP8BBgbERWP/Mw6GkkDyz6eAJTuSJsGjJW0taTdgT2BGbUun1mt1aVngIhYDfRrkfazsuErgCtqXS6zTSVpKnAo0F9SI3ARcKik4aTLYi8AZwNExHxJN5G6cloHjI+IpnqU26yW6n3XmW2m5i5fqkvfFKWuY9pLe5Spo4qIcRExMCJ6RsSgiLgqIk6OiH0i4qMRcWxELC7Lf2lEfCgi9oqIu+pZdrNacV9nZtattPeBVLW6c59/PqMxM7NCOdCYmVmhHGjMzKxQDjRmZlYoBxozMyuUA42ZmRXKgcbMzArlQGNmZoVyoDEzs0I50JiZWaG6XaAp735iU7uiqEXXFSMmXFtIP2abO637PTOzLdXtAo2ZmdWWA42ZmRXKgcbMzArlQGNmZoVyoDEzs0I50JiZWaEcaMzMrFB1CzSSXpD0hKTZkmZWGC9JP5G0UNJcSfvXo5xmbZE0WdISSfPK0vpKulfSM/l9x5zuOm3dUr3PaA6LiOERMbLCuKOBPfPrLODKmpbMrDrXAKNbpH0DuD8i9gTuz5/Bddq6qXoHmrYcB1wbycPADpIG1rtQZuUiYjqwokXyccCUPDwFOL4s3XXaup16BpoA7pE0S9JZFcbvCrxU9rkxp72LpLMkzZQ0c+nSpRUXVIuuY4pSTZct7dGtS3t2x/PixH3eU6YRE67drO+hk3ZZs3NELAbI7zvl9KrqtFlXU89A88mI2J90OWG8pENajFeFaeI9CRGTImJkRIwcMGBAEeU0ay9V1Wmo7gDKrLOoW6CJiEX5fQlwKzCqRZZGYHDZ50HAotqUzmyLvFq6JJbfl+T0quu0D6CsK6lLoJG0naQ+pWHgM8C8FtmmAafkO3UOBN4oXY4w6+CmAafm4VOB28vSXaet22mo03J3Bm6VVCrDryPibknnAETEz4A7gTHAQmA1cHqdymrWKklTgUOB/pIagYuA7wI3SToTeBH4Qs7uOm3dUl0CTUQ8B+xbIf1nZcMBjK9lucw2VUSMa2XUERXyuk5bt9SRb282M7MuoF6XzszMupV63qq/24VP1G3Z4DMaMzMrmAONmZkVyoHGzMwK5UBjZmaFcqCpYHP7RtvUH/vK83fSPr02y+b2e2ZmnZMDjZmZFcqBxszMCuVAY2ZmhXKgMTOzQjnQmJlZoRxozMysUA40ZmZWKAcaMzMrlAONmZkVyoHGzMwK5UBTkM7YpUw9y/zixH02dE1TKke13dRsan4zqy0HGjMzK5QDjZmZFarmgUbSYEm/l/SkpPmSzq+Q51BJb0ianV8X1rqcZltK0guSnsh1eGZO6yvpXknP5Pcd611Os6LV44xmHfC1iPgIcCAwXtLQCvn+EBHD82tibYto1m4Oy3V4ZP78DeD+iNgTuD9/NuvSah5oImJxRDyWh1cBTwK71rocZnVyHDAlD08Bjq9jWcxqoq6/0UgaAuwHPFJh9MclzZF0l6RhbczjLEkzJc1cunRpQSU12ywB3CNplqSzctrOEbEY0kEXsFOlCV2vrSupW6CRtD3wG+CrEbGyxejHgA9ExL7AT4HbWptPREyKiJERMXLAgAHFFdhs030yIvYHjiZdIj6k2gldr60rqUugkdSTFGSuj4hbWo6PiJUR8WYevhPoKal/jYtptkUiYlF+XwLcCowCXpU0ECC/L6lfCc1qox53nQm4CngyIn7YSp7353xIGkUq5/LaldJsy0jaTlKf0jDwGWAeMA04NWc7Fbi9PiU0q52GOizzk8DJwBOSZue0fwJ2A4iInwEnAv9b0jrgLWBsREQdymq2uXYGbs3HSw3AryPibkmPAjdJOhN4EfhCHctoVhM1DzQR8RCgjeS5AriiNiUya38R8Rywb4X05cARtS+RWf102Z4BtrTfqxcn7rPRvr+qyVPKV2m4s6t2/duavtS/2ca07ANtU5ZbPn/3h2ZWe1020JiZWcfgQGNmZoVyoDEzs0I50JiZWaEcaMzMrFAONGZmVigHGjMzK5QDjZmZFcqBxszMCuVAY2ZmhepSgebJxuWb3CVKy+5hKk2/pV2tWH1sSvdALbum6ardBpnVQ5cKNGZm1vE40JiZWaEcaMzMrFAONGZmVigHGjMzK5QDjZmZFcqBxszMCuVAY2ZmhapLoJE0WtLTkhZK+kaF8VtLujGPf0TSkNqX0qw4G2sDZl1JzQONpB7AvwFHA0OBcZKGtsh2JvBaROwBXA58r7alNCtOlW3ArMuoxxnNKGBhRDwXEWuBG4DjWuQ5DpiSh28GjpCkGpbRrEjVtAGzLkMRUdsFSicCoyPiS/nzycABEXFuWZ55OU9j/vxszrOswvzOAs7KH/cClgOlfP1rOFzr5XWW4Y5SjvYY/kBEDGALVdkGWtbrp7d0uQVo+T1b2zrq9mqXet2miKjpC/gC8MuyzycDP22RZz4wqOzzs0C/Kuc/sx7D9Vx2Rx7uKOVoz/XZ0lc1baAzvNp7u3T1V3feXvW4dNYIDC77PAhY1FoeSQ3A+4AVNSmdWfGqaQNmXUY9As2jwJ6SdpfUCxgLTGuRZxpwah4+EXgg8iGBWRdQTRsw6zIaar3AiFgn6Vzgd0APYHJEzJc0kXRqOQ24CviVpIWkM5mxm7CISXUarueyO/JwRylHe67PFmmtDbTnMmqkXbdLN9Btt1fNbwYwM7PuxT0DmJlZoRxozMysWPW+7W0zbxPcC5idX8uBAN4BXgPWAE8BLwPr82tdztOUh9cBbwN/yenrgSfze5TlXw8szNOVPjeV5WkqSyst652y+bzTYtnl+cqHZwBry5ZReq1uUZaWZSit27pW5ru+bNnrK7yagDdbzK98m61vkV5a77fz57V5uCm/r62wvdeW5X+nLP9zwOtl26mU5y9l26K0rNK8osL8V1TYLgG8VZa+GnixbF6rK2yj0jRv5/dXaK4jtwDP5NeksuHvk/7fshD4Rr3bRYHtbXRb6wlsDdyYxz8CDNnC5Q0Gfk9qk/OB83P6vsB/A08AdwB/1cr0k4ElwLwW6ReT9gulfceYnD6qLG0OcEKL6XoAjwO/LUs7N69vAP3L0ncEbgXmktr13mXjXshln827b5+vuF6tlYUWILgAABGGSURBVAvonec9J2+fb1fYBj8F3iz7fBqwtGx+XyobtxtwT97eC0rfH3AN8HzZNMNz+oSytHmkdte3ze+03pW4HRrB50k7nj8Dh+QNtgq4E/gj6Q9SC0k7s0XARNKPsMtzvkNJO5Wm/MWVgtHfk3ZO63Ol+myuVP+L1KgC+G7+0ks70eE073T3zctYD8widaMTwOeAX+fhccCFefgp0o53HfAqcDDNO+kxef6LgY/n/G/mZazJy/hcXk4AXwF+m9OXAWfn4ZOBe/PwF4Hrcv778vwDOAG4NA+PIe1k1gOfojkwT6I5GFwCrCwbfiXnf43UONcD/1KWfmGuvJEramlnf0mutAFcBDyQt+X/oTnovZiXtZ50i/CkPPwGcFTO92aeb1PO/6E8fi2pIS3Jy7iM1JBK3+lleV5fAF7K0w8Hvprn+3Fg97yddgf6lX3nvUh1Z2i920MB7asH6X9sH2xtPUlt5Wd5eCxw4xYucyCwfx7uQ2rbQ0l3630qp58BfKeV6Q8B9qdyoLmgQv5tgYayZS8pfc5p/0hqs+WBZj9gCCl4lAeay4CL8vBfA/eXjXtX3rL0iuvVWrkAAdvn9J6k4H5g2fxGAr/ivYHmila214PAp/Pw9sC2efga4MSNfFfHkO4KbvM77QqXznrRfLbwJ1LD6E3ayd9A2tEOpPkIfCLwMdIO5hXSl98zf74rz/N14AOkf/I+Tarsu5N2+lsBh+f825AqHKSj5ENyOQB2AqbnfA2kQLKGVAkW57IcmcsWwB9y2V/PZT+aFAhfAQ4iHUG/XFb2FXkZDfnzy8Ceefivad7J98rzWJfX6Z38+kReNqSgvFVOH0a6tfwtUmMdlcu6d17fdbk863L68WXfwfGkI60mYDtSsHgbOJYUXN4GTqL5LJT8/naedqe8zl8g7dzWkHZcpbJtnYeX5+HX8zxKZZpL+u63IR299SQFmRfzdhpL+i7XA48Bf5OX90HSGVYTqaHvnNf/I3m5a0kHJAfm7+FA4MN5u34kunY3MjXvMioiFkfEY3l4FekAYVfSlYzpOdu9pIPMStNPZxP+dxcRqyNiXf7Ym+a6iaRBpHryyxbTPB4RL1SY3VDg/pznKWCIpJ03UoSK69VauSJ5M6f3pHn/VepH7zLSAdpG5T72GiLi3jzvNyNidTXTZuOAqRvL1BUCzadJX+yHSTvwVaT12jF/Xkf6kt4GeuQvbhVph7OOFJzeRzoCbyIfLQBfztMJOB+4gLSDOogUVNYCp5BOdSHt+F4jBYtepKOMrfJrZS7fVqSzgmNyvpPyK0gBb6tcrr2Az5Aq0PtJZx+75nnslNdlEPCfeT6rSQ2hTy7LF4Cv07zDv5G0c/570hnKGuAcUsBcBZxHc124KC9rNens42JSo704j28AdsnrK1KAVJ7+3rzuPUg76h3z8G9Iwa8XKWDvkdf512XbbmfSWcI2pAB/TF7/P5O+x545fTvSkd62eX2Uv6ez87x7kI4035fz3kMKGAH8Q84L8Iuy+X0Y2CGv2xE575K8HY7Ky941vxaVDb+a3yGdYZWGu5JdSWd4JZXWc0Oe3L7eIH2XWyz33L4fqT3NIx20QKrjgytP1aZzJc2VNFnSjmXLOUDSfNLlq3PKdvA/Iu2011c5/zmkqwtIGkU6uBuUxwVwj6RZuYuhklbXq7VySeohaTapnt4bEY+U1g+YFhGLK5Tt83ndb5ZUWsaHgdcl3SLpcUmX5WBVcmme5nJJW5fPTNK2pP3Jbza2UTp1oMl/djuOtMP4M2kHuC1p59PWEdVfkb7060k7W3j3byGzSH+gC9JR/3+Qjs57kyr9o6Sd0u2kndda0rb8dn4v/VZxMM0V9GRSYzyEVJFmkI4O30cKRGtIO/d3gLtJl4C2J+1kf07zKfOBpB32gxHRixQot8tppaOce4Af07zD/1rOs5R0TfdN4DZSkOmZ867N6T/J690np98PDMjTLsvb6eK8Lg2kgKA8TWPOW/r9ZHAePiKXYw3wA5ovNz6b398mHYWVvrfLSDurHqSG0IMUoM8mXQ5tyOV+Kq/vKuCh/IpczhfzPGblPFuRroM35eH5uTyQgv2Xc1nm5+23C+kAY2uaf8cp1amWw1QY7ioqtaOW61lNnk1fsLQ9aSf21YhYSTrbHC9pFql+rt3EWV5JupQ6nNSufrChsBGPRMQw0hWDb0rqLemzwJKImFVxbpV9F9gxB4HzSFcWSkHrkxGxP+lqxXhJh+T0VterUrlyelNEDCcFsVGS9pa0CylQ/bRCue4g/fbyUdKl8tIZaANpP3VBXsYHSZfZAL5JOkD8GNCXdPBa7hjgjxGx0bPHTh1oSF9YI+myTFNElHbSpUtLA0kbcg1ph9Ek6XTSDnwZaUf4AVJDeT/wv0k7tVGknW7pcs0i0qUWSJeQ7s757gPG0/y7wRLSjmwZqYG8TKo0w0hH/juSzohuzeX6dF7GDsA3aD6in0s6wod0lnASaWe6mvRbiUhH5OS8kM5oSjv4HXP+d0jf8bOkQNSXdJq7mlSht8rrN5G04/6rvNzSby4DSQG2B+lsYbs8PDGPEynI98zzuhT4KM1nlAfn5R6e121b4J/zMhtIAbyBFHgm5vn1yPMZkuezHyl4bUfqJeKGvL7TSIFkPTAzz2NXUkAqdVv0HOmMpBTs/x/NdX7b/H2tJ32/pd9o/of0Y3IP0pHs3TT/vtdICkCl4Z1p7jqmq3YjU5cuoyT1JLWh6yPiFkiXoiLiMxExglSPn92UeUbEq3kHvZ7UfkZVyPMkqQ7tDXwSOFbSC6R6d7ik6zayjJURcXoOAqeQ6u7zedyi/L6EtA8YVe16tShXefrrpN9YRpPayh7AwlzmbfOf3omI5RHxdp7sF8CIPNwIPJ4vja4jHYDun6dZnC/TvQ1cXWF7jaWKy2bQ+QPNONIOZz9A+brwJ0hHz+tI1zp3IB2lirTT+w7pjpY1pMtTi0lH60E66yj98PwIKUj0In2Ry/I8nqT5KPnvSWc3O+S8M0nbtIH0286APPwW6ZS6L+l3oJ+QvuiHSWdBkYf/kpc/knQZDuCwvLx+eTlLczn65zO6z+Z8K0gBooF05L4PaYe+Js9ja9JlgEWko5ZpuSwinSmsyWX/PWkHvDVpJ1s6Q3yY5iOts2ne4e+blwvpEtXDNJ/d3JbTdwP+b96uu5BuxiBvn9KdYB8h7eQhBZnJefjXpMBYWu5OpMC2B+ngQKSGcQ8pyP2FFIAGkBrGRaTfUoJ0Jlb6XecZUhBtIB11HpiH/z3PrwfpjOnjeVs8mNdtz/z+TF7+k128G5madxmV2/FVwJMR8cOy9J3y+1akA5afbeJ8B5Z9PIF0yYq8bg15+AOkS9cvRMQ3I2JQRAwhrfcDEfHFjSxjh7ydAL4ETI+IlZK2k9Qn59mOtO8pLb/ierVWLkkDJO2Q07ch/db7VET8Z0S8PyKG5DKvjvRMr5brfixpPwbp+91RUqn35sNJd55tmCZ/H8eXypvT3kc66L29re2xwcbuFuioL9IOcDnpx8dVvPvW1/JbjCu9miqktbw9tjx9fRX5Sq+1ZcPrWplfy8/vVBjfWnpT2fqWr0vL254rrX+l9SjdWtxyfVve/lu6Vbv8FuXSDRalALm2wrYvXRorpZVuH15F8+2hpTv11tB8S/PanL6I5t/PyrdN+XQt0ytt39Klu9L3Umkbld+mXXpfSToiXZjfX8rDC4Efki7ZPgt8q95tosC2NqblepLOQI/Nw71JZ6cLSZeEP7iFyzsofx9zKbsVmXQp88/59V2aezbZBbizbPqppAPId0gHPGfm9F+RfuuYSwqOA3P6yaSD0dmkKxfHVyjTobz7rrOv5Hmvy3X0lzn946SDkKdIt8XvmNM/SDrYLN2S/K2yebW2XhXLRTqgejyvxzzgwla2Y/ldZ/8vz2sO6WDyr8vGfTrP6wnSnWa9cvoDOW0e6Q7V7cumOQ24odrv1F3QmJlZoTr7pTMzM+vgHGjMzKxQDjRmZlYoBxozMyuUA42ZmRXKgcbMzArVbQKNpH6SZufXK5JeLvvcq0Xe35X+XNURSHpI0vAK6ZtVTklDJc3JfRsNaSVPg6TXWxl3naTj25j/P5a6ytjIfMZL+rs25nOkpNtaG9+VuH6+azrXzwJIulrSXvVYdkM9FloPEbGc1McRki4m/Znp++V58j9gFRFH1b6Em24Lyvk54OaI+E57lqfMP5L+2b+mrUwR8W8FLb/Tcf18F9fPAkTE6fVadrc5o2mNpD0kzZP0M9K/bwdKaizr4uF0pd5L50i6OqftrNTb6UxJMyQdmNMvkTRF0u8lPSPpjJy+az7qm52X9YlWytIg6VeSnsj5vtJifI98tHZx/tyYu7worcNVkuZLuqt0xFZhGceSeng9R9J9Oe3/5OnnSTqvwjRbSfp3SQsk3UHq36y17fkPpG5i/lCaf07/bt6G/63mLjcukfTVPPxhSQ/kPI+1PJJV6sX2MUlD8nRXSfovSc9JGl+W79T8nczOZd6qte0q6R/yOs3RRvqwqhfXT9fPjdXPvLxrJN0j6QVJx0v6QZ7Xf6q5G5uHJA2vS3uod/cW9XhR9gAkUp9Z64GPlY1vJPUrti+pK4m+Ob30fiP5QUOkfrnm5eFLSDuD3qTKXOp48evA13OeHpR15dCiXAcAd5V93iG/P0Tq/+zG0nxalHMPUncb++T0W4Cxbaz/JaQecSH1BzaH1KVPH1IfSB8lne2+nvOcROoXbStSp4orqdBNR8ty5eHS83KOzp9/SH5KY4tyzAKOycO9c3mOJPWXdjCpH7lBZdP9gdQP3U6kroh6kDocvI3mh0VNAv62je26mObuNnaod710/XT93Jz6mZf3X3ldRpD6Biw9yOwO4LNl39PwLV3e5ry6zaWzjXg2Ih6tkH446WmBKwCiuTvsI4G91Pxspx2VOrcDuC0i1gBrJE0ndbH9KPDzfBR3W0TMaaUcC/N8f0x6GNk9ZeOuAn4dEd9rbdqIeCIPzyLtYKpxMPCbyA87UrrmfBC5Y73sEGBqpF5vGyU9WOW8S96KiNJD5WblZW6g9FyQ/hFxB0DefuTtuzepo8tPR8QrZZP9NtKDuJZIWkHqRPNI0vaemafdhtQ32e+ovF3nA9dJup3mDkA7ItdP18+N1c87I2KdpCdyGe/N6U/w3m3d2vdYWHvo9pfOsr+0kl56zkql9FERMTy/do2It/K4lvkjIh4gdcq3GLherfzAGOk6/UdJRx5fIT2HpuSPpCcXbl1pWlInkCVNVP/7W7VPQtySTvHKnxvSWtlam/+iPH3LH5srra+AyWXfy14R8Z02tutRpJ5yR5Eaf/kDnzoS18+N6+71s7S8Uke3lH1+1/rUoz040LTtPmCspL4ApXean0NDTi+vZMdL2lpSf/IptVIX369ExCRS76j7UYFSV92KiP8gdW+/f9noSXm5N5SuubaT6cAJkrZRetDUcaTT/pZ5xubrybuSugdvS+kJplWJiNeAZZKOAVB66NS2efQK0qMQ/lXSwa3NI7sPOClv+9KdXLtV2q65EQ3KO9kJpCPObVubcQfl+tmcx/WzSvVoD7501oaImCvpX4HpktaRTqvPJDXiK5UeotZA6na71LAfJV0vHgxcFBGvKv3o+o+S3iE9UK21Z1oMBq5SOq8OWjzRLiL+VdKlwDWSTmmndZwhaWouN8CVEfFEi53FzaRn2swjPWdnOm2bBNwn6SXSA5mq8XekyzeXko7INjwPPiIWK/1IfGdb653L/e287K1IvwucQzqibLldG4BfK91+uxXwvUjPp+80XD83cP3cNJW+x0Lbgx8T0I4kXQIsi4gf1bssZi25flq9+NKZmZkVymc0dSJpJu+9dPm3EbGgUv7NXMbPSI8oLvfDiLi2neY/jfSY5nIXRMR9lfJb5+H62fFI+hLpP0blpkfEVyrl70gcaMzMrFC+dGZmZoVyoDEzs0I50JiZWaEcaMzMrFD/HzqPEBOszuFmAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#plt.subplot(121),sns.countplot(x='Triceps_skin_fold_thickness',hue='Target',data=data)\n",
    "#plt.subplot(122),sns.countplot(x='Triceps_skin_fold_thickness_miss',hue='Target',data=data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>serum_insulin_miss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>155.548223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>155.548223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>155.548223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>94.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>168.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   serum_insulin  serum_insulin_miss\n",
       "0            NaN          155.548223\n",
       "1            NaN          155.548223\n",
       "2            NaN          155.548223\n",
       "3           94.0            0.000000\n",
       "4          168.0            0.000000"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#data['serum_insulin_miss'] = data['serum_insulin'].apply(lambda x:np.mean(data['serum_insulin']) if pd.isnull(x) else 0)\n",
    "#data[['serum_insulin','serum_insulin_miss']].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(<matplotlib.axes._subplots.AxesSubplot at 0x6fe0dcd748>,\n",
       " <matplotlib.axes._subplots.AxesSubplot at 0x6fe0dcd748>)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEHCAYAAAC9TnFRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de7wU9X3/8dc7XAQRIzcNeiRHjdoeNIIcMZeaRLHRkEawmkRab6ghyU/9aav8YppUkV9sbWOitba23rEYlRCjmPozIV5qm1QJKCJIjCj+8AjKxRsGETh8+sfMwnJYzpkDuzu757yfj8c+dua785357O7Mfna+M/MdRQRmZmYd+VDeAZiZWX1wwjAzs0ycMMzMLBMnDDMzy8QJw8zMMumZdwBZDB48OBobG/MOw7qoefPmrY6IIdVertdrq7Ryr9t1kTAaGxuZO3du3mFYFyXp/+exXK/XVmnlXrfdJGVmZpk4YZiZWSZOGGZmlkldHMPoyjZu3EhLSwvr16/PO5Qd6tOnDw0NDfTq1SvvUMyqqh62T6jeNuqEkbOWlhb69+9PY2MjkvIOZzsRwZo1a2hpaeGAAw7IOxyzqqr17ROqu426SSpn69evZ9CgQTW7Mkpi0KBBNf8Py6wSan37hOpuoxVLGJJuk7RS0sKisoGSZkt6MX0eUKnl15NaXhmh9uMzq6R6WP+rFWMl9zDuAE5sU3YZ8EhEHAw8ko6bmVkdqNgxjIh4QlJjm+JxwOfS4WnA48C3KhVDvVqzZg1jxowB4PXXX6dHjx4MGZJcrDlnzhx69+5d9mU+/fTTrFy5khNPbJvjzaxYd94+q33Qe5+IWAEQESsk7b2jCSVNAiYBDBs2rErhVdeoyXcy/ZyjtisfNGgQ8+fPB2DKlCnsscceXHrppZnn29raSo8ePToVy9NPP83ChQtzXyFtq1GT78xlufO+f2Yuy60X3Xn7rNmD3hFxU0Q0R0RzIXsbfOlLX2LUqFEMHz6cW265BYBNmzax11578d3vfpfRo0czZ84cZs2axaGHHsoxxxzDhRdeyPjx4wF47733OPvssxk9ejQjR47kwQcf5P3332fq1KncddddjBgxgpkzZ+b5Fs3qVlffPqu9h/GGpKHp3sVQYGWVl1/3pk2bxsCBA1m3bh3Nzc2ccsop9O/fn3feeYcjjzyS733ve6xbt45DDjmEX/3qVwwbNoyvfOUrW+pPnTqVE088kTvuuIO33nqLo48+mgULFnD55ZezcOFCrrvuuhzfnVl96+rbZ7X3MGYBZ6XDZwEPVHn5de/aa6/liCOO4JOf/CQtLS289NJLAPTu3ZuTTz4ZgOeff55DDz2Uj370o0hiwoQJW+r/4he/4KqrrmLEiBEce+yxrF+/nmXLluXyXsy6mq6+fVZsD0PS3SQHuAdLagGuAK4GZkg6F1gGfLlSy++KfvnLX/LEE0/w5JNP0rdvX/7oj/5oy7nXffv23XJqXUTscB4Rwf33389BBx20TfkTTzxRucDNuoHusH1WbA8jIiZExNCI6BURDRFxa0SsiYgxEXFw+vxmpZbfFb3zzjsMHDiQvn37smjRIn7zm9+UnG748OG88MILvPrqq0QE995775bXTjjhBK6//vot48888wwA/fv3Z+3atZV9A2ZdWHfYPmv2oLdt74tf/CLr1q3jiCOOYOrUqRx99NElp9t999254YYbOP744znmmGPYd999+fCHPwzAFVdcwbp16zj88MMZPnw4U6ZMAeC4447j2WefZeTIkT7obbYTusP26b6kalxhhYGkg7Gf//znJad7++23txk//vjjeeGFF4gIvv71r9Pc3AxAv379uPnmm7erP2TIEN/Mx6yTutv26T2MLurGG29kxIgRNDU18f777/O1r30t75DqiqT9JT0mabGkRZIuSsunSHpN0vz0MbaozrclLZH0gqQT8oveal29bp/ew+iiJk+ezOTJk/MOo55tAi6JiKcl9QfmSZqdvnZtRFxTPLGkJuA0YDiwL/BLSYdERGtVo7a6UK/bp/cwzEqIiBUR8XQ6vBZYDOzXTpVxwD0R8UFELAWWAKMrH6lZ9ThhmHUg7RNtJPBUWnSBpAVpj8yFHpf3A14tqtZCiQQjaZKkuZLmrlq1qoJRm5WfE4ZZOyTtAfwEuDgi3gVuBA4CRgArgB8UJi1RfbsT7t3ljdUzJwyzHZDUiyRZ3BUR9wFExBsR0RoRm4Gb2drs1ALsX1S9AVhezXjNKs0HvWtMuXsozdLz6MMPP8xFF11Ea2sr5513Hpdd5tuUKLks91ZgcUT8sKh8aKHHZeBkoHCDsFnAjyT9kOSg98HAnCqGbFWQx/YJtbONOmF0c62trZx//vnMnj2bhoYGjjrqKE466SSampryDi1vnwbOAJ6TND8t+ytggqQRJM1NrwBfB4iIRZJmAM+TnGF1vs+QsnKopW3UCaObmzNnDh/72Mc48MADATjttNN44IEHun3CiIj/ovRxiYfaqXMVcFXFgrJuqZa2UR/D6OZee+019t9/a9N7Q0MDr732Wo4RmVmxWtpGnTC6uVI9Z9bDTe/Nuota2kadMLq5hoYGXn116+UDLS0t7LvvvjlGZGbFamkbdcLo5o466ihefPFFli5dyoYNG7jnnns46aST8g7LzFK1tI36oHeNyXqaXbn07NmTG264gRNOOIHW1lbOOecchg8fXtUYzOpFtbdPqK1t1AnDGDt2LGPHju14QrMaVu5rJAC+f/IfEq+u7nC6pv0Hl33ZxWplG3WTlJmZZeKEYWZmmThhmJlZJk4YZmaWiROGmZll4oRhZmaZ+LTaGrNs6uFlnd+wy5/rcJpzzjmHn/3sZ+y9994sXLiww+nNuqs9bj+2ZPmynZxfvW2f3sMwzj77bB5++OG8wzCzEmpp+3TCMD7zmc8wcODAvMMwsxJqaft0wjAzs0ycMMzMLBMnDDMzy8QJw8zMMvFptTUmy2l25TZhwgQef/xxVq9eTUNDA1deeSXnnntu1eMwq3XvTXysZHkle6utpe3TCcO4++678w7BzHaglrbPXJqkJP2FpEWSFkq6W1KfPOIwM7Psqp4wJO0H/G+gOSIOA3oAp1U7DjMz65y8Dnr3BPpK6gnsDizPKY6aEBF5h9CuWo/PrFIi6mP9r1aMVU8YEfEacA1J9ysrgHci4hdtp5M0SdJcSXNXrVpV7TCrpk+fPqxZs6ZmV8qIYM2aNfTp41ZD635a3l7PhnVra3b7hOpuo1U/6C1pADAOOAB4G/ixpNMjYnrxdBFxE3ATQHNzc+1+W7uooaGBlpYWajkp9unTh4aGhrzDMKu62/97GROBhr36IO14Or2X7/ZbrW00j7OkjgeWRsQqAEn3AZ8Cprdbq4vq1asXBxxwQN5hmFkJaz9o5frHl3Y43bzvn1mFaPKXxzGMZcAnJO0uScAYYHEOcZiZWSfkcQzjKWAm8DTwXBrDTdWOw8zMOieXC/ci4grgijyWbWZmO8d9SZmVIGl/SY9JWpxeZHpRWj5Q0mxJL6bPA9JySbpe0hJJCyQdme87MCs/Jwyz0jYBl0TEHwKfAM6X1ARcBjwSEQcDj6TjAF8ADk4fk4Abqx+yWWU5YZiVEBErIuLpdHgtyYkZ+5GcEj4tnWwaMD4dHgfcGYkngb0kDa1y2GYV5YRh1gFJjcBI4Clgn4hYAUlSAfZOJ9sPeLWoWkta1nZe3eKCVOuanDDM2iFpD+AnwMUR8W57k5Yo2+6C04i4KSKaI6J5yJAh5QrTrCqcMMx2QFIvkmRxV0Tclxa/UWhqSp9XpuUtwP5F1Rvo5n2kWdfjhGFWQnpR6a3A4oj4YdFLs4Cz0uGzgAeKys9Mz5b6BEkfaSuqFrBZFfgGSmalfRo4A3hO0vy07K+Aq4EZks4l6bXgy+lrDwFjgSXAOmBidcM1qzwnDLMSIuK/KH1cApLubNpOH8D5FQ3KLGdukjIzs0ycMMzMLBMnDDMzy8QJw8zMMnHCMDOzTJwwzMwsEycMMzPLxAnDzMwyccIwM7NMnDDMzCwTJwwzM8vECcPMzDJxwjAzs0ycMMzMLBMnDDMzy8QJw8zMMnHCMDOzTJwwzMwsEycMMzPLxAnDzMwyccIwM7NMnDDMzCwTJwwzM8skl4QhaS9JMyX9VtJiSZ/MIw4zM8uuZ07L/Qfg4Yg4VVJvYPec4jAzs4yqnjAk7Ql8BjgbICI2ABuqHYeZmXVOHk1SBwKrgNslPSPpFkn9cojDzMw6IY+E0RM4ErgxIkYCvwcuazuRpEmS5kqau2rVqmrHWHGjJt+ZdwhmZp2SR8JoAVoi4ql0fCZJAtlGRNwUEc0R0TxkyJCqBmhmZturesKIiNeBVyUdmhaNAZ6vdhxmZtY5eV2HcSFwl6QFwAjgb3KKw2yHJN0maaWkhUVlUyS9Jml++hhb9Nq3JS2R9IKkE/KJ2qxyMiUMSY9kKcsqIuanzU0fj4jxEfHWzs7LrCNjxozJVFbCHcCJJcqvjYgR6eMhAElNwGnA8LTOP0vqsbMxm9Widk+rldSH5BqJwZIGAEpf2hPYt8Kxme2S9evXs27dOlavXs1bb71FRADw7rvvsnz58g7rR8QTkhozLm4ccE9EfAAslbQEGA38904Fb1aDOroO4+vAxSTJYR5bE8a7wD9VMC6zXfav//qvXHfddSxfvpxRo0ZtSRh77rkn559//q7M+gJJZwJzgUvSPeT9gCeLpmlJy7YhaRIwCWDYsGG7EoNZ1bXbJBUR/xARBwCXRsSBEXFA+jgiIm6oUoxmO+Wiiy5i6dKlXHPNNbz88sssXbqUpUuX8uyzz3LBBRfs7GxvBA4iOfa2AvhBWq4S08Z2BT77z+pYpiu9I+IfJX0KaCyuExG+mMBq3oUXXsivf/1rXnnlFTZt2rSl/Mwzz+z0vCLijcKwpJuBn6WjLcD+RZM2AB23e5nVkUwJQ9K/kfyrmg+0psUBOGFYzTvjjDN46aWXGDFiBD16JMehJe1UwpA0NCJWpKMnA4UzqGYBP5L0Q5Im3IOBObscvFkNydqXVDPQFIVGYLM6MnfuXJ5//nmkUq1GOybpbuBzJCd9tABXAJ+TNILkD9MrJMf5iIhFkmaQXFO0CTg/IlpLzdesXmVNGAuBj5C02ZrVlcMOO4zXX3+doUOHdqpeREwoUXxrO9NfBVzVyfDM6kbWhDEYeF7SHOCDQmFEnFSRqMzKaPXq1TQ1NTF69Gh22223LeWzZs3KMSqz+pM1YUypZBBmlTRlypS8QzDrErKeJfUflQ7ErFI++9nP5h2CWZeQ9SyptWw9p7w30Av4fUTsWanAzMqlf//+Ww54b9iwgY0bN9KvXz/efffdnCMzqy9Z9zD6F49LGk/S7YFZzVu7du024/fffz9z5viMVyufZVMPz2W5wy5/rqrL26neaiPifuC4MsdiVhXjx4/n0UcfzTsMs7qTtUnqT4tGP0RyXYavybC6cN99920Z3rx5M3Pnzu30NRlmlv0sqS8VDW8iuWBpXNmjMauABx98cMtwz549aWxs5IEHHsgxIrP6lPUYxsRKB1JJoybfybzvd74bCOsabr/99rxDMOsSst5AqUHST9O7j70h6SeSGiodnFk5tLS0cPLJJ7P33nuzzz77cMopp9DS0pJ3WGZ1J2uT1O3Aj4Avp+Onp2V/XImgzMpp4sSJ/Nmf/Rk//vGPAZg+fToTJ05k9uzZOUdWm/I64weqf9aPdU7Ws6SGRMTtEbEpfdwBuDN/qwurVq1i4sSJ9OzZk549e3L22WezatWqvMMyqztZE8ZqSadL6pE+TgfWVDIws3IZPHgw06dPp7W1ldbWVqZPn86gQYPyDsus7mRNGOcAXwFeJ+mx9lSgrg+EW/dx2223MWPGDD7ykY8wdOhQZs6c6QPhZjsh6zGM/wucld67GEkDgWtIEolZTfvrv/5rpk2bxoABAwB48803ufTSS7nttttyjsysvmTdw/h4IVkARMSbwMjKhGRWXgsWLNiSLAAGDhzIM888k2NEZvUpa8L4kKQtW1y6h5F178QsV5s3b+att7b83+HNN9/c5t7eZpZN1h/9HwC/ljSTpEuQr+A7i1mduOSSS/jUpz7FqaeeiiRmzJjBd77znbzDMqs7Wa/0vlPSXJIOBwX8aUQ8X9HIzMrkzDPPpLm5mUcffZSI4L777qOpqSnvsMzqTuZmpTRBOElYXWpqanKSMNtFO9W9uZmZdT9OGGZmlokThpmZZeKEYWZmmThhmJlZJk4YZmaWSW4JI+319hlJP8srBjMzyy7PPYyLgMU5Lt/MzDohl4SR3t71i8AteSzfzMw6L689jOuA/wNs3tEEkiZJmitpbrnvjjZq8p1lnV+9y/OWnGZWP6qeMCT9CbAyIua1N11E3BQRzRHRPGSI7wZr1SfpNkkrJS0sKhsoabakF9PnAWm5JF0vaYmkBZKOzC9ys8rIYw/j08BJkl4B7gGOkzQ9hzjMOnIHcGKbssuARyLiYOCRdBzgC8DB6WMScGOVYjSrmqonjIj4dkQ0REQjcBrwaEScXu04zDoSEU8Ab7YpHgdMS4enAeOLyu+MxJPAXpKGVidSs+rwdRhmnbNPRKwASJ/3Tsv3A14tmq4lLdtGJY/NmVVargkjIh6PiD/JMwazMlGJstiuwMfmrI55D8Osc94oNDWlzyvT8hZg/6LpGoDlVY7NrKKcMMw6ZxZwVjp8FvBAUfmZ6dlSnwDeKTRdmXUVme+4Z9bdSLob+BwwWFILcAVwNTBD0rnAMuDL6eQPAWOBJcA6YGLVAzarMCcMsx2IiAk7eGlMiWkDOL+yEZnly01SZmaWiROGmZll4oRhZmaZOGGYmVkmThhmZpaJE4aZmWXihGFmZpk4YZiZWSZOGGZmlokThpmZZeKEYWZmmXSbhDFq8p1dYvl5vw8z6766TcIwM7Nd44RhZmaZOGGYmVkmThhmZpaJE4aZmWXihGFmZpk4YZiZWSZOGGZmlokThpmZZeKEYWZmmThhmJlZJk4YZmaWiROGmZll4oRhZmaZOGGYmVkmThhmZpZJ1ROGpP0lPSZpsaRFki6qdgxmZtZ5PXNY5ibgkoh4WlJ/YJ6k2RHxfA6xmJlZRlVPGBGxAliRDq+VtBjYD3DCsLoh6RVgLdAKbIqIZkkDgXuBRuAV4CsR8VZeMZqVW67HMCQ1AiOBp0q8NknSXElzV61aBZS+n/XO3uO6bb2s8+7M8pZNPbzTcbWts6PltTfv9uoU18v6Xkotq7PvLc97ke/M95DRsRExIiKa0/HLgEci4mDgkXTcrMvILWFI2gP4CXBxRLzb9vWIuCkimiOieciQIdUP0KzzxgHT0uFpwPgcYzEru1wShqReJMniroi4L48YzHZRAL+QNE/SpLRsn7TJtdD0undu0ZlVQNWPYUgScCuwOCJ+WO3lm5XJpyNiuaS9gdmSfpulUppcJgEMGzaskvGZlV0eexifBs4AjpM0P32MzSEOs50WEcvT55XAT4HRwBuShgKkzytL1HNTq9WtPM6S+i9A1V6uWblI6gd8KD3Lrx/weWAqMAs4C7g6fX4gvyjNyi+P6zDM6t0+wE+T1lV6Aj+KiIcl/QaYIelcYBnw5RxjNCs7JwyzToqIl4EjSpSvAcZUPyKz6nBfUmZmlokThpmZZeKEYWZmmThhmJlZJk4YZmaWiROGmZll4oRhZmaZOGGYmVkmThhmZpaJE4aZmWXihGFmZpk4YZiZWSZOGGZmlkldJIwNKxZtGR41+c6Sw8VlpcqzKNRbNvXwbcoL41mXtzPL39G8C8svjqnU8trGXNBe7O3ZUb3i8eJpimNoL+62cbatVxgv9X6Lp+voM9lRWXvvzczaVxcJw8zM8ueEYWZmmThhmJlZJk4YZmaWiROGmZll4oRhZmaZOGGYmVkmThhmZpaJE4aZmWXihGFmZpk4YZiZWSZOGGZmlokThpmZZeKEYWZmmThhmJlZJk4YZmaWiROGmZllkkvCkHSipBckLZF0WR4xmFWC123ryqqeMCT1AP4J+ALQBEyQ1FTtOMzKzeu2dXV57GGMBpZExMsRsQG4BxiXQxxm5eZ127o0RUR1FyidCpwYEeel42cAR0fEBW2mmwRMSkcPA14verk/sLad8axl5ZqmXurVYkw7W6+c8x4UEf3ZRVnW7Tbr9aHAC7u63AoYDKzOO4g6U6uf2UcjYki5ZtazXDPqBJUo2y5rRcRNwE0Akua2eXkIsLSd8axl5ZqmXurVYkw7W6+c8y7Xht7hul28XtcqSXMjojnvOOpJd/nM8miSagH2LxpvAJbnEIdZuXndti4tj4TxG+BgSQdI6g2cBszKIQ6zcvO6bV1a1ZukImKTpAuAnwM9gNsiYlEH1druwh8D/Gc741nLyjVNvdSrxZh2tl45510WO7lu16KabjKrUd3iM6v6QW8zM6tPvtLbzMwyccIwM7NsIiK3B3Ab8D6wuejRmj6HH35U+bG5zWMjyfpYeP0D4HrSptx6fwAnklwHsgS4rMTruwH3pq8/BTRWYPtfCSwsKpsCvAbMTx9j0/JGkt+KQvm/7GCeJesXvT4MeA+4tKjsL4BFwELgbqBPWn5X+vksTGPtlZb/ObAgffwaOCIt7wPMAZ5N53dl0TJuTcsXADOBPdLys4FVRfGeV1TnLODF9HFWWrY78O/Ab9NlXF00fcl5AR8F5qVli4BvFNWZADyXxvUwMLjd7yznFfYzwLkkpyMuT7/opcD96cYawDvphrqWbTfqvH9c/Kj/x6ai4UJyaAVeBdalr7cCvydZN98Gfgd8Ie8f+zJsez2Al4ADgd4kP2ZNbab5X6Q/zCRnfN1bge3/SLZPGJeWmLaxeLp25lmyftHrPwF+XJgG2I/kN6dvOj4DODsdHktybY1IEsk30/JPAQPS4S8AT6XDYmsi6EWSZD+Rju9ZFMMPSRM0yY/8DSXiHAi8nD4PSIcHkCSMY9NpepOcwPGFDubVG9gtHd4DeAXYl+Skp5WkSQL4e2BKe59vrk1SEfEE8AjJBrmWJPP/HjgEWE+yIa8m2ZgLZ3SVujhqh4vIMM2GXaxv9Uts/Y43kzTRrifZmN5n23VNJIlkd2B8FWOslCzdmIwDpqXDM4Exkjqz/bUr3f7fLNf8OiJpPMkPb9sz13oCfSX1JPl+l6fxPRQpkj2HhrT81xHxVlr3yaLyiIj30vJe6SPS195NYxDQl45/W04AZkfEm+myZpP0IrAuIh5L57kBeLqw/B2JiA0R8UE6uhtbD0UUkmG/NK496eC6oVo8htGfZI9iD5I3cyDQj2R3ryDrSptlut67WN/q14fY+h0XNu71adnuaXlhb7bQdcjuJP9K691+JAmwoIXt39eWaSJiE8ne/qAqxHaBpAWSbpM0oKj8AEnPSPoPScd0pr6kfsC3gCuLJ4yI14BrgGXACuCdiPhF8TSSegFnkDTZtHUu8P+Kpu0haT7JP/fZEfFU0Wu3k3Rx9AfAPxbN45Q03pmSChd+dvj9SNoL+BLJn+725oWk/SUtSOf5dxGxPCI2At8kaZJaTtJh5q0l3uMWtZYwRNJ1w24kexWQNA0UsuPmEnW8F2DlsJGt/7jaU2jCqndZuujJ1I1Pmd0IHASMIPkB/0FavgIYFhEjgb8EfiRpz07UvxK4tmgPAIA0oYwDDiDZs+wn6fQ28/xn4ImI+M82dY8lSRjfKpRFRGtEjCD51z9a0mFFr01Ml7EY+Gpa/CDJsaGPA79k6x5du599ujd0N3B9RLzcwbyIiFfT8o8BZ0naJ02E3wRGpnEtAL5dYrlb1FrC6EWyJ3EwW5ugNhUNl4p3V/cCSiWhgq7ww2DZFPY0C80F69Lxwl5IocPC9+ka3X1k6cZkyzTpD9SHqXATUkS8kf7obgZuJmk6IyI+iIg16fA8kuMvh2StDxwN/L2kV4CLgb9KL7I8HlgaEavSf9z3kRyjAEDSFSR/Yv+yeDmSPg7cAowrxNUmjreBx0lOLCgubyU5keCUdHxNUXPRzcCodLij7+cm4MWIuK5o3juaV/Hyl5M0yR1DklSJiJfSZrcZxe+9lFpLGANJjiksIznaHyRZTyQHHzeytYmgXNr7DNwk1TVkWV82pc+9Sf6Ztm1nDpIzbNYBD5Q1unxk6cZkFsmZOgCnAo+mPywVI2lo0ejJJGcoIWlIer8RJB1I8qfy5az1I+KYiGiMiEbgOuBvIuIGkt+aT0jaPW3HH0OyB4Ck80iOJUxIE1BhGcNIEssZEfG7ovIhaTMRkvqSJKPfKvGxtFwkzUi/LRHvSYVlk/QW8HlJA9K9oM+nZUj6Hknyvrid975lXpIa0ngKe1SfJjn76zWgSVKhN9s/Llp+aR2ddVDJB8ku1fvkf7aMH34E2599VzhLqjD+AXADXee02rEkZ329BHwnLZsKnJQO9yE5o2gJyUHfAyuw/a8g+SPYQtK8829sPc1zFjA0nfYUkn/Gz5Ic6P1S0XxuAZrT4ZL12yx3CtueVnslyQ/4wrR+4YyiTelnUzhN9fKi5b1VVD43Lf848Ey67IVF038I+FUa10KS03X3TF/726L39RjwB0VxnZN+9kuAiWlZQ7ouLmb702dLzoskESxg62m9k4qW8Y10XgtImrQGtfeduWsQMzPLpNaapMzMrEY5YZiZWSZOGGZmlokThpmZZeKEYWZmmThhmJlZJk4YXZikb0g6s8zzvEPSqenwLZKayjl/6z7qff2U9FDhQr3uour39LbtSeoZSeduZRUR/1LuebaZ/3mVnL/VBq+fO5z/2ErOvxZ5D6OMJPWT9O+SnpW0UNJXJY1Ke9ecJ+nnhcv3JT0u6W8k/QdwUfE/o/T199Lnz6X1Z0j6naSrJf25pDmSnpN0UDvxTJF0adHy/i6t97tCb5+Shqdl89NeLg+W1ChpYdF8LpU0pcT8H5fUXIhX0lXpe39S0j7l+VStXLx+Zls/0/d6o6THJL0s6bNKer5dLOmOoulekTS41Oeavn61pOfTuK/J+DXVNCeM8joRWB4RR0TEYSTdIf8jcGpEjCK5a9dVRdPvFRGfjYgflJhXsSOAi4DDSbpZPiQiRpN0UXBhJ+Lrmda7GLgiLYXJeAIAAAIwSURBVPsG8A+R9LDZTNJFw87oBzwZEUcATwBf28n5WOV4/cy+fg4AjiO5G9+DwLXAcOBwSSPaTLvd5yppIElfVsMj6SX2ezsZd01xwiiv54Dj039Kx5D0NnkYMFtJH/nfZdubndybcb6/iYgVkfRE+RJQ6K//OZI7kWV1X/o8r6jef5P03Pkt4KMR8X4n5ldsA/CzEvO32uH1c/v578iDkfSb9BzwRkQ8F0kHhItK1N3mc42Id4B3Se6tcoukP2Vr78d1zQmjjCLpuXIUyQr0t6QdpkXEiPRxeER8vqjK74uGN5F+H5LEtjd2+qBoeDPb3h+kM8ehCvVaC/Ui4kckPVu+D/xc0nHFsaSKb161Ixtja8dkW+ZvtcPrZ6fWz+L30Pb9bVO37ecq6fL0mM9oklvCjqf0zZfqjhNGGUnaF1gXEdNJ7uJ1NDBE0ifT13tJGr6D6q+wtf/6cST3Bqk4JV1FvxwR15P07vlx4A1gb0mDJO0G/Ek1YrHK8vpZsRjbfq5HStoD+HBEPETSxNa2Gasu+V9geR0OfF/SZpIum79J8m/oekkfJvm8r2P7ewpDcsOTByTNYet9zqvhq8DpkjaS3D5yakRslDSV5Cb2S0n77re65/WzMkp9rv1JPq8+JPfV+Ysc4ysbd29uZmaZuEnKzMwycZNUFyDpO8CX2xT/OCKuKjW9WTXV6vpZq3HVMjdJmZlZJm6SMjOzTJwwzMwsEycMMzPLxAnDzMwy+R89ttCJ5jc48QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#plt.subplot(121),sns.countplot(x='serum_insulin',hue='Target',data=data)\n",
    "#plt.subplot(122),sns.countplot(x='serum_insulin_miss',hue='Target',data=data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pregnants                       0\n",
      "Plasma_glucose_concentration    0\n",
      "blood_pressure                  0\n",
      "Triceps_skin_fold_thickness     0\n",
      "serum_insulin                   0\n",
      "BMI                             0\n",
      "Diabetes_pedigree_function      0\n",
      "Age                             0\n",
      "Target                          0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "medians = data.median()\n",
    "data = data.fillna(medians)\n",
    "print(data.isnull().sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 数据标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 特征与标签分类\n",
    "x_label = data.drop([\"Target\"],axis=1)\n",
    "y_label = data[\"Target\"]\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "mm_scaler = MinMaxScaler(feature_range=(0,1))\n",
    "X_train = mm_scaler.fit_transform(x_label)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 将数据存入.CSV文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>763</td>\n",
       "      <td>0.588235</td>\n",
       "      <td>0.367742</td>\n",
       "      <td>0.530612</td>\n",
       "      <td>0.445652</td>\n",
       "      <td>0.199519</td>\n",
       "      <td>0.300613</td>\n",
       "      <td>0.039710</td>\n",
       "      <td>0.700000</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>764</td>\n",
       "      <td>0.117647</td>\n",
       "      <td>0.503226</td>\n",
       "      <td>0.469388</td>\n",
       "      <td>0.217391</td>\n",
       "      <td>0.133413</td>\n",
       "      <td>0.380368</td>\n",
       "      <td>0.111870</td>\n",
       "      <td>0.100000</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>765</td>\n",
       "      <td>0.294118</td>\n",
       "      <td>0.496774</td>\n",
       "      <td>0.489796</td>\n",
       "      <td>0.173913</td>\n",
       "      <td>0.117788</td>\n",
       "      <td>0.163599</td>\n",
       "      <td>0.071307</td>\n",
       "      <td>0.150000</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>766</td>\n",
       "      <td>0.058824</td>\n",
       "      <td>0.529032</td>\n",
       "      <td>0.367347</td>\n",
       "      <td>0.239130</td>\n",
       "      <td>0.133413</td>\n",
       "      <td>0.243354</td>\n",
       "      <td>0.115713</td>\n",
       "      <td>0.433333</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>767</td>\n",
       "      <td>0.058824</td>\n",
       "      <td>0.316129</td>\n",
       "      <td>0.469388</td>\n",
       "      <td>0.260870</td>\n",
       "      <td>0.133413</td>\n",
       "      <td>0.249489</td>\n",
       "      <td>0.101196</td>\n",
       "      <td>0.033333</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "763   0.588235                      0.367742        0.530612   \n",
       "764   0.117647                      0.503226        0.469388   \n",
       "765   0.294118                      0.496774        0.489796   \n",
       "766   0.058824                      0.529032        0.367347   \n",
       "767   0.058824                      0.316129        0.469388   \n",
       "\n",
       "     Triceps_skin_fold_thickness  serum_insulin       BMI  \\\n",
       "763                     0.445652       0.199519  0.300613   \n",
       "764                     0.217391       0.133413  0.380368   \n",
       "765                     0.173913       0.117788  0.163599   \n",
       "766                     0.239130       0.133413  0.243354   \n",
       "767                     0.260870       0.133413  0.249489   \n",
       "\n",
       "     Diabetes_pedigree_function       Age  Target  \n",
       "763                    0.039710  0.700000       0  \n",
       "764                    0.111870  0.100000       0  \n",
       "765                    0.071307  0.150000       0  \n",
       "766                    0.115713  0.433333       1  \n",
       "767                    0.101196  0.033333       0  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "featrue_name = x_label.columns\n",
    "X_train = pd.DataFrame(columns=featrue_name,data=X_train)\n",
    "train = pd.concat([X_train,y_label],axis=1)\n",
    "train.to_csv('FE_pima-indians-diabetes.csv',index=False)# index代表第一列有无索引，header代表第一行有无表头信息\n",
    "train.tail()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.对数据进行TF-IDF变换\n",
    "- TF-IDF可以突出对特别类别有贡献的低频词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants_tfid</th>\n",
       "      <th>Plasma_glucose_concentration_tfid</th>\n",
       "      <th>blood_pressure_tfid</th>\n",
       "      <th>Triceps_skin_fold_thickness_tfid</th>\n",
       "      <th>serum_insulin_tfid</th>\n",
       "      <th>BMI_tfid</th>\n",
       "      <th>Diabetes_pedigree_function_tfid</th>\n",
       "      <th>Age_tfid</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.344840</td>\n",
       "      <td>0.567893</td>\n",
       "      <td>0.414553</td>\n",
       "      <td>0.257929</td>\n",
       "      <td>0.112918</td>\n",
       "      <td>0.267243</td>\n",
       "      <td>0.198404</td>\n",
       "      <td>0.443473</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.106143</td>\n",
       "      <td>0.413469</td>\n",
       "      <td>0.669905</td>\n",
       "      <td>0.374274</td>\n",
       "      <td>0.208540</td>\n",
       "      <td>0.269209</td>\n",
       "      <td>0.182207</td>\n",
       "      <td>0.282419</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.450236</td>\n",
       "      <td>0.743245</td>\n",
       "      <td>0.338285</td>\n",
       "      <td>0.198449</td>\n",
       "      <td>0.110573</td>\n",
       "      <td>0.086664</td>\n",
       "      <td>0.210207</td>\n",
       "      <td>0.164720</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.114003</td>\n",
       "      <td>0.487413</td>\n",
       "      <td>0.719514</td>\n",
       "      <td>0.292357</td>\n",
       "      <td>0.161429</td>\n",
       "      <td>0.340778</td>\n",
       "      <td>0.063800</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.458579</td>\n",
       "      <td>0.124783</td>\n",
       "      <td>0.232915</td>\n",
       "      <td>0.141469</td>\n",
       "      <td>0.390196</td>\n",
       "      <td>0.721221</td>\n",
       "      <td>0.165710</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants_tfid  Plasma_glucose_concentration_tfid  blood_pressure_tfid  \\\n",
       "0        0.344840                           0.567893             0.414553   \n",
       "1        0.106143                           0.413469             0.669905   \n",
       "2        0.450236                           0.743245             0.338285   \n",
       "3        0.114003                           0.487413             0.719514   \n",
       "4        0.000000                           0.458579             0.124783   \n",
       "\n",
       "   Triceps_skin_fold_thickness_tfid  serum_insulin_tfid  BMI_tfid  \\\n",
       "0                          0.257929            0.112918  0.267243   \n",
       "1                          0.374274            0.208540  0.269209   \n",
       "2                          0.198449            0.110573  0.086664   \n",
       "3                          0.292357            0.161429  0.340778   \n",
       "4                          0.232915            0.141469  0.390196   \n",
       "\n",
       "   Diabetes_pedigree_function_tfid  Age_tfid  Target  \n",
       "0                         0.198404  0.443473       1  \n",
       "1                         0.182207  0.282419       0  \n",
       "2                         0.210207  0.164720       1  \n",
       "3                         0.063800  0.000000       0  \n",
       "4                         0.721221  0.165710       1  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 导入工具包\n",
    "from sklearn.feature_extraction.text import TfidfTransformer\n",
    "tfid_form = TfidfTransformer()\n",
    "# 输出稀疏矩阵\n",
    "X_train_tfid = tfid_form.fit_transform(X_train).toarray()\n",
    "featrue_name_tfid = featrue_name + '_tfid'\n",
    "train = pd.DataFrame(columns=featrue_name_tfid,data=X_train_tfid)\n",
    "train_tfid = pd.concat([train,y_label],axis=1)\n",
    "train_tfid.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存数据为.csv文件\n",
    "train_tfid.to_csv('FE_Tfid_pima-indians-diabetes.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
